## "Where are the missing dead" replication
##
## Reproduce: The DD Estimates
##
## 			  - Figure 1 (log transformed)
##			  - Appendix Table A3, A4, and A5
##
##			  - Figure A6 (raw data)
##			  - Appendix Table A9, A10, and A11
## 
## Input:    main.dta

## Guoer Liu
## 7/27/2022 


setwd("~/Dropbox/Missing dead/replication/")


library(haven)
library(estimatr)
library(scales)
library(modelsummary) ## modelsummary(): for lm_robust object
library(extrafont) ## change fonts in plot


### Function ###

coefPlot <- function(coef.dataframe, 
					 x.lab.min = NULL, x.lab.max = NULL, x.lab.step = NULL,
					 legend = TRUE, leg.pos = "topright"){
	# Args:
	#	coef.dataframe: coefficient summary (|coef| x 2)
	#	x.lab.min:  x-axis label minimum
	#	x.lab.max:  x-axis label maximum
	#	x.lab.step: x-axis label step 
	#   legend:     logical parameter
	#   legend:     position
	# Outputs:
	#	coeffient plot
	y.names <- rownames(coef.dataframe)
	est <- coef.dataframe[, 1]
	est.se <- coef.dataframe[, 2]


	y.ind <- c(1.5, 1.9, 3.3, 3.7, 5.1, 5.5)
	y.ind <- rev(y.ind)

	par(mar= c(5,7,1,1))
	plot(NA, xlim = c(x.lab.min, x.lab.max), 
	         ylim = c(0.9, 6), 
	         type = "n", 
	         xlab = "DD Coefficient", 
	         ylab = "", axes = F)
	abline(v = 0, col = "gray")
	segments(x0 = est - qnorm(0.95) * est.se, 
	         y0 = y.ind, 
	         x1 = est + qnorm(0.95) * est.se, 
	         y1 = y.ind, 
	         col = alpha("black", 0.45), lwd = 6)
	segments(x0 = est - qnorm(0.975) * est.se, 
	         y0 = y.ind, 
	         x1 = est + qnorm(0.975) * est.se, 
	         y1 = y.ind, 
	         col = "black", lwd = 2)
	points(est, y.ind, cex = 2, pch = 19, 
	       col = "black", bg = "black")
	axis(1, at = seq(x.lab.min, x.lab.max, x.lab.step), cex.axis = 1, las = 1)
	axis(2, at = y.ind, 
			labels = rep(c("Deaths", "Acc. Counts"), 3), 
			las = 2, tick = F, cex.axis = 1)
	axis(2, at = 5.9, labels = "Aggregate", las = 2, tick = F, 
		 	cex.axis = 1.2, font = 2)
	axis(2, at = 4.1, labels = "Traffic", las = 2, tick = F, 
		 	cex.axis = 1.2, font = 2)
	axis(2, at = 2.3, labels = "Others", las = 2, tick = F, 
		 	cex.axis = 1.2, font = 2)
	if (legend == TRUE){
		legend(leg.pos, title = "CI", 
			   legend = c("95%", "90%"), 
			   col = c("black", alpha("black", 0.45)), 
			   lty = c(1, 1), cex = 0.8,
			   lwd = c(1, 4))
	}
}


mydata <- read_dta("main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)

outcome <- c("deathsum_py_lg", "totcount_py_lg",
			 "traf_deathsum_py_lg", "traf_totcount_py_lg",
			 "othr_deathsum_py_lg", "othr_totcount_py_lg")

outcome.raw <- c("deathsum_py", "totcount_py",
			 	 "traf_deathsum_py", "traf_totcount_py",
			 	 "othr_deathsum_py", "othr_totcount_py")

cov.names <- c("log_pop2", "rural_pop_pct",
			   "log_light", "log_total_tax_rev_pc",
			   "log_coal_produ0", "log_mine_indwage",
			   "log_traf_indwage",
			   "sec_tty", "sec_age", 
			   "sec_promotion", "gov_tty", 
			   "gov_age", "gov_promotion")

t.var <- c("did", "treat_time", "treat_group")


rhs.1 <- paste('~', paste(t.var, collapse = " + "))
rhs.2 <- paste('~', paste(c(t.var, cov.names), collapse = " + "))
rhs.3 <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
						  "+ factor(year)")
rhs.4 <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
						  "+ factor(year) + factor(prov_id)")
rhs.list <- list(rhs.1, rhs.2, rhs.3, rhs.4)


estimate <- NULL
SE <- NULL
mod.out <- list()

estimate.raw <- NULL
SE.raw <- NULL
mod.out.raw <- list()

## log transformed outcome variable ##

for(i in outcome){
	for(j in 1:length(rhs.list)){
		formula <- as.formula(paste(i, rhs.list[[j]]))
		mod <- lm_robust(formula, cluster = prov_id, data = mydata)
		estimate <- c(estimate, coefficients(mod)['did'])
		SE <- c(SE, summary(mod)$coefficients['did', 2])
		mod.out <- append(mod.out, list(mod))
		## rank deficient due to fixed effects drop 1
	}
}


## raw outcome variable ##

for(i in outcome.raw){
	for(j in 1:length(rhs.list)){
		formula <- as.formula(paste(i, rhs.list[[j]]))
		mod <- lm_robust(formula, cluster = prov_id, data = mydata)
		estimate.raw <- c(estimate.raw, coefficients(mod)['did'])
		SE.raw <- c(SE.raw, summary(mod)$coefficients['did', 2])
		mod.out.raw <- append(mod.out.raw, list(mod))
		## rank deficient due to fixed effects drop 1
	}
}


## Plot Fig 1: log transformed

coef.df <- data.frame(coef = estimate[seq(4, length(estimate), 4)],
					  se = SE[seq(4, length(SE), 4)])

#pdf("f1_did.pdf", family = "Times New Roman", width = 6, height = 6)
coefPlot(coef.df, x.lab.min = -1, x.lab.max = 1.5, x.lab.step = 0.5)
#dev.off()

## Plot Fig A6: raw data

coef.raw.df <- data.frame(coef = estimate.raw[seq(4, length(estimate.raw), 4)],
					      se = SE.raw[seq(4, length(SE.raw), 4)])

#pdf("fA6_did_raw.pdf", family = "Times New Roman", width = 6, height = 6)
coefPlot(coef.raw.df, x.lab.min = -150, x.lab.max = 100, x.lab.step = 50)
#dev.off()

## Tables: log transformed

tab.a3 <- mod.out[1 : 8]
names(tab.a3) <- paste0("(", seq(1, 8, 1), ") ", 
						rep(c("Death (all)", "Acc. count (all)"), each = 4))

tab.a4 <- mod.out[c(10:12, 14:16)]
names(tab.a4) <- paste0("(", seq(1, 6, 1), ") ", 
						rep(c("Death (road)", "Acc. count (road)"), each = 3))

tab.a5 <- mod.out[c(18:20, 22:24)]
names(tab.a5) <- paste0("(", seq(1, 6, 1), ") ", 
						rep(c("Death (others)", "Acc. count (others)"), each = 3))


print(modelsummary(tab.a3, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  output = "table.tex",
	  stars = FALSE))

print(modelsummary(tab.a4, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  output = "table.tex",
	  stars = FALSE))

print(modelsummary(tab.a5, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  output = "table.tex",
	  stars = FALSE))


## Tables: raw data


tab.a3.raw <- mod.out.raw[1 : 8]
names(tab.a3.raw) <- paste0("(", seq(1, 8, 1), ") ", 
						rep(c("Death (all)", "Acc. count (all)"), each = 4))

tab.a4.raw <- mod.out.raw[c(10:12, 14:16)]
names(tab.a4.raw) <- paste0("(", seq(1, 6, 1), ") ", 
						rep(c("Death (road)", "Acc. count (road)"), each = 3))

tab.a5.raw <- mod.out.raw[c(18:20, 22:24)]
names(tab.a5.raw) <- paste0("(", seq(1, 6, 1), ") ", 
						rep(c("Death (others)", "Acc. count (others)"), each = 3))


print(modelsummary(tab.a3.raw, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  output = "table.tex",
	  stars = FALSE))

print(modelsummary(tab.a4.raw, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  output = "table.tex",
	  stars = FALSE))

print(modelsummary(tab.a5.raw, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  output = "table.tex",
	  stars = FALSE))






















